Date in Pandas


In [2]:
import pandas as pd
import numpy as np

Timestamp


In [3]:
pd.Timestamp('9/1/2016 10:05AM')


Out[3]:
Timestamp('2016-09-01 10:05:00')

Period


In [4]:
pd.Period('1/2016')


Out[4]:
Period('2016-01', 'M')

In [5]:
pd.Period('3/5/2016')


Out[5]:
Period('2016-03-05', 'D')

DatetimeIndex


In [8]:
t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'),
                            pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])
t1


Out[8]:
2016-09-01    a
2016-09-02    b
2016-09-03    c
dtype: object

In [9]:
type(t1.index)


Out[9]:
pandas.tseries.index.DatetimeIndex

PeriodIndex


In [10]:
t2 = pd.Series(list('def'), [pd.Period('2016-09'), pd.Period('2016-10'), pd.Period('2016-11')])
t2


Out[10]:
2016-09    d
2016-10    e
2016-11    f
Freq: M, dtype: object

In [11]:
type(t2.index)


Out[11]:
pandas.tseries.period.PeriodIndex

Converting to Datetime


In [14]:
d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']
ts3 = pd.DataFrame(data=np.random.randint(10,100,(4,2)), index=d1, columns=list('ab'))
ts3


Out[14]:
a b
2 June 2013 60 73
Aug 29, 2014 58 30
2015-06-26 91 74
7/12/16 86 71

In [16]:
ts3.index = pd.to_datetime(ts3.index)
ts3


Out[16]:
a b
2013-06-02 60 73
2014-08-29 58 30
2015-06-26 91 74
2016-07-12 86 71

In [17]:
pd.to_datetime('4.7.12', dayfirst=True)


Out[17]:
Timestamp('2012-07-04 00:00:00')

Timedeltas


In [18]:
pd.Timestamp('9/3/2016') - pd.Timestamp('9/1/2016')


Out[18]:
Timedelta('2 days 00:00:00')

In [20]:
pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D 3H')


Out[20]:
Timestamp('2016-09-14 11:10:00')

Working with Dates in Dataframe


In [22]:
dates = pd.date_range('10-01-2016', periods=9, freq='2W-SUN')
dates


Out[22]:
DatetimeIndex(['2016-10-02', '2016-10-16', '2016-10-30', '2016-11-13',
               '2016-11-27', '2016-12-11', '2016-12-25', '2017-01-08',
               '2017-01-22'],
              dtype='datetime64[ns]', freq='2W-SUN')

In [23]:
df = pd.DataFrame({'Count 1': 100 + np.random.randint(-5, 10, 9).cumsum(),
                  'Count 2': 120 + np.random.randint(-5, 10, 9)}, index=dates)
df


Out[23]:
Count 1 Count 2
2016-10-02 106 119
2016-10-16 105 128
2016-10-30 106 128
2016-11-13 112 129
2016-11-27 121 115
2016-12-11 129 127
2016-12-25 127 121
2017-01-08 128 120
2017-01-22 136 129

In [25]:
df.index.weekday_name


Out[25]:
array(['Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday', 'Sunday',
       'Sunday', 'Sunday', 'Sunday'], dtype=object)

In [26]:
df.diff()


Out[26]:
Count 1 Count 2
2016-10-02 NaN NaN
2016-10-16 -1.0 9.0
2016-10-30 1.0 0.0
2016-11-13 6.0 1.0
2016-11-27 9.0 -14.0
2016-12-11 8.0 12.0
2016-12-25 -2.0 -6.0
2017-01-08 1.0 -1.0
2017-01-22 8.0 9.0

In [27]:
df.resample('M').mean()


Out[27]:
Count 1 Count 2
2016-10-31 105.666667 125.0
2016-11-30 116.500000 122.0
2016-12-31 128.000000 124.0
2017-01-31 132.000000 124.5

In [30]:
df['2017']


Out[30]:
Count 1 Count 2
2017-01-08 128 120
2017-01-22 136 129

In [31]:
df['2016-12']


Out[31]:
Count 1 Count 2
2016-12-11 129 127
2016-12-25 127 121

In [32]:
df['2016-12':]


Out[32]:
Count 1 Count 2
2016-12-11 129 127
2016-12-25 127 121
2017-01-08 128 120
2017-01-22 136 129

In [40]:
df.asfreq('M', method='ffill')


Out[40]:
Count 1 Count 2
2016-10-31 106 128
2016-11-30 121 115
2016-12-31 127 121

In [44]:
import matplotlib.pyplot as plt
%matplotlib inline
df.plot()


Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x232e3d37f28>

In [ ]: